Workshop EDSD

packages <- c(
  "sf", # for working with spatial data
  "spData", # to easily get world boundaires
  "countrycode", # to convert between iso2 and iso3 country codes
  "flowmapblue", # for interactive flow mapping
  "flowmapper" # for static flow mapping using ggplot2
  )
#install.packages(packages)
library(sf)
Warning: package 'sf' was built under R version 4.4.3
Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
library(spData)
Warning: package 'spData' was built under R version 4.4.3
To access larger datasets in this package, install the spDataLarge
package with: `install.packages('spDataLarge',
repos='https://nowosad.github.io/drat/', type='source')`
library(countrycode)
Warning: package 'countrycode' was built under R version 4.4.3
library(flowmapblue)
Warning: package 'flowmapblue' was built under R version 4.4.3
library(flowmapper)
Warning: package 'flowmapper' was built under R version 4.4.3
library(tidyverse)
Warning: package 'ggplot2' was built under R version 4.4.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
scopus_country_flows <- read_csv("data_processed/scopus_2024_V1_scholarlymigration_countryflows_enriched.csv")
Rows: 92980 Columns: 16
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (8): countrynamefrom, countrynameto, regionfrom, regionto, incomelevelfr...
dbl (8): n_migrations, year, gdp_per_capitafrom, gdp_per_capitato, populatio...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
countries <- spData::world
countries |> 
  ggplot() +
  geom_sf()

ggplot() +
  geom_sf(data = countries) +
  geom_sf(data = st_point_on_surface(countries), color = "darkred")
Warning: st_point_on_surface assumes attributes are constant over geometries
Warning in st_point_on_surface.sfc(st_geometry(x)): st_point_on_surface may not
give correct results for longitude/latitude data

countries_centroids <- countries |> 
  st_centroid() |> # finds a center of each country'spolygon
  st_coordinates() |>  # extracts numeric coorindates from the POINT geometry
  as.data.frame() |> # converts the matrix to a data.frame 
  setNames(c("lon", "lat")) |> # renames the columns
  cbind(countries) |> # adds back all the columns (except for geometry) form `countries`
  select(iso_a2, lon, lat, name_long) # only selects a few columns that we really need
Warning: st_centroid assumes attributes are constant over geometries
glimpse(countries_centroids)
Rows: 177
Columns: 4
$ iso_a2    <chr> "FJ", "TZ", "EH", "CA", "US", "KZ", "UZ", "PG", "ID", "AR", …
$ lon       <dbl> 178.56842, 34.74198, -12.18574, -96.39551, -103.57290, 67.23…
$ lat       <dbl> -17.3156217, -6.2505643, 24.2783849, 60.4767578, 44.7559814,…
$ name_long <chr> "Fiji", "Tanzania", "Western Sahara", "Canada", "United Stat…
library(countrycode)

# assuming your data.frame is called `scopus_country_flows`
scopus_country_flows <- scopus_country_flows |> 
  mutate(
    iso2codefrom = countrycode(iso3codefrom,
      origin = "iso3c",
      destination = "iso2c"),
    iso2codeto = countrycode(iso3codeto,
      origin = "iso3c",
      destination = "iso2c")
  )
centroids_flows <- countries_centroids |> 
    left_join(scopus_country_flows, join_by(iso_a2==iso2codefrom))
flowmap <- flowmapblue(
  locations = centroids_flows[, c("iso_a2", "lon", "lat")],
  flows = centroids_flows[, c("iso_a2", "n_migrations")],
  mapboxAccessToken = Sys.getenv("MAPBOX_TOKEN"),
  darkMode = TRUE,
  animation = FALSE,
  clustering = TRUE
  )

flowmap